import datetime
import os
import openpyxl
from bs4 import BeautifulSoup
from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE

from gitee_config import OUTPUT_DIRS_DEFAULT
from spider import spider

def __get_gitee_repo_forks(classify, repos):
    tasks = list()
    repo_forks = list()
    for repo in repos:
        link = repo[4]
        flag = repo[1]+'/'+repo[0]
        task = (link, flag)
        tasks.append(task)
    result = spider(1000, tasks, 0)
    for flag, response in result:
        if response.status_code == 200:
            resp = response.text
            soup = BeautifulSoup(resp, "html.parser")
            div = soup.find('div', class_='git-project-header-actions')
            watcher_button = div.find('span', class_='ui buttons basic watch-container')
            watchers = watcher_button.find('a', class_='ui button action-social-count').get('title')
            star_button = div.find('span', class_='basic buttons star-container ui')
            stars = star_button.find('a', class_='ui button action-social-count').get('title')
            fork_button = div.find('span', class_='ui basic buttons fork-container')
            forks = fork_button.find('a', class_='ui button action-social-count disabled-style').get('title')
            second = soup.find('div', class_='ui secondary pointing menu')
            try:
                issues = second.find_all('span', class_='ui mini circular label')[0].text.replace('\n', '')
            except:
                issues = '0'
            try:
                pull_requests = second.find_all('span', class_='ui mini circular label')[1].text.replace('\n', '')
            except:
                pull_requests = '0'
            for repo in repos:
                if repo[1]+'/'+repo[0] == flag:
                    repo_list = list(repo)
                    repo_list.insert(4, int(pull_requests))
                    repo_list.insert(4, int(issues))
                    repo_list.insert(4, int(forks))
                    repo_list.insert(4, int(stars))
                    repo_list.insert(4, int(watchers))
                    repo_new = tuple(repo_list)
                    if repo_new not in repo_forks:
                        repo_forks.append(repo_new)
        else:
            print("ERR:Failed to connect gitee forks "+repo[1]+"/"+repo[0])
    return repo_forks


def __get_gitee_repos_in_class(classify, url, page_list):
    flag = [classify]
    if flag[0] == 'C':
        total_page = page_list[0]
    elif flag[0] == 'C++':
        total_page = page_list[1]
    tasks = list()
    for page in range(1, int(total_page)+1):
        link = url + '&page=' + str(page)
        task = (link, flag)
        tasks.append(task)
    result = spider(1000, tasks, 0)
    repos = list()
    print(result)
    for flag, response in result:
        if response.status_code == 200:
            resp = response.text
            soup = BeautifulSoup(resp, "html.parser")
            div = soup.find('div', class_='explore-projects__detail-list')
            detail = div.find('div', class_='ui relaxed divided items explore-repo__list')
            div_list = detail.find_all('div', class_='item')
            for div in div_list:
                item = div.find('div', class_='content')
                description = item.find('div', class_='project-desc mb-1').get('title')
                title = item.find('div', class_='project-title')
                h3 = title.find('h3')
                a = h3.find('a')
                url = 'https://gitee.com'+a.get('href')
                repo_list = a.get('title').split('/')
                owner_name = repo_list[0]
                repo_name = repo_list[1]
                bottom = item.find('div', class_='left d-align-center')
                update_time = bottom.find('div', class_='text-muted project-item-bottom__item d-flex-center').get(
                    'title')
                try:
                    system = bottom.find_all('a', class_='project-item-bottom__item')[1].get('title')
                    language = bottom.find('a', class_='project-language project-item-bottom__item').get('title')
                except:
                    language = None
                    system = bottom.find_all('a', class_='project-item-bottom__item')[0].get('title')
                repo = repo_name, owner_name, system, language, url, update_time, description
                repos.append(repo)
        else:
            print("ERR:Failed to connect gitee repos")
    return repos

def get_gitee_repos_all(classify_list, page_list, save_file=None):
    time_now = datetime.datetime.now()
    date_time = "%s-%s-%s" % (time_now.year, time_now.month, time_now.day)
    if not save_file:
        if not (os.path.exists(OUTPUT_DIRS_DEFAULT)):
            os.mkdir(OUTPUT_DIRS_DEFAULT)
        work_dir = os.path.join(OUTPUT_DIRS_DEFAULT, date_time)
        if not (os.path.exists(work_dir)):
            os.mkdir(work_dir)
        save_file = os.path.join(work_dir, "repos_in_gitee.xlsx")
    wb = openpyxl.Workbook()
    wb.remove(wb.active)
    for classify, url in classify_list:
        sheet = wb.create_sheet(classify.replace('/', '&'))
        repos = __get_gitee_repos_in_class(classify, url, page_list)
        # print(repos)
        repos_forks = __get_gitee_repo_forks(classify, repos)
        print(repos_forks)
        sheet.cell(1, 1, "repo name")
        sheet.cell(1, 2, "owner name")
        sheet.cell(1, 3, "sub_system")
        sheet.cell(1, 4, "language")
        sheet.cell(1, 5, "watcher")
        sheet.cell(1, 6, "stars")
        sheet.cell(1, 7, "forks")
        sheet.cell(1, 8, "issues")
        sheet.cell(1, 9, "PR")
        sheet.cell(1, 10, "url")
        sheet.cell(1, 11, "last-update")
        sheet.cell(1, 12, "description")
        row = 2
        for repo in repos_forks:
            sheet.cell(row, 1, repo[0])
            sheet.cell(row, 2, repo[1])
            sheet.cell(row, 3, repo[2])
            sheet.cell(row, 4, repo[3])
            sheet.cell(row, 5, repo[4])
            sheet.cell(row, 6, repo[5])
            sheet.cell(row, 7, repo[6])
            sheet.cell(row, 8, repo[7])
            sheet.cell(row, 9, repo[8])
            sheet.cell(row, 10, repo[9])
            sheet.cell(row, 11, repo[10])
            sheet.cell(row, 12).value = ILLEGAL_CHARACTERS_RE.sub(r'', str(repo[11]))
            row += 1
    wb.save(save_file)
    return

if __name__ == "__main__":
    get_gitee_repos_all()
